# -*- coding: utf-8 -*-
# Python 3.8.3
"""
弗利尔美术馆中国藏品分类信息爬虫
"""
import requests
import json
import os
from fake_useragent import UserAgent    # 生成随机请求头
import csv


class Freersackler_China:
    def __init__(self):
        # 请求url
        self.url = 'https://asia.si.edu/wp-json/fsep/v1/search' 
        # 请求头
        self.headers = {'user-agent':str(UserAgent(path="fake_useragent.json"))} 

    def get_objects(self):
        """抓取搜索结果页面所有信息"""
        # 访问从m到n页的数据
        for i in range(1, 15):
            print("第{}页".format(str(i)))
            data = {"query_vars":{"s":"China","terms":"{\"classification[]\":\"1494\"}","paged":i,"sort":"score","view":"gallery"}}# 请求不同分类的信息
            error_count = 0
            # 最多重试三次
            while error_count<3:
                try:
                    reqs = requests.post(url=self.url, json=data, headers=self.headers, timeout=60).json()
                    for j in range(21):
                        d = {}
                        object_code = str(reqs["posts"][j]["permalink"]).replace("https://asia.si.edu/object/", "")
                        d[object_code] = "Weapon and Armament"
                        with open("D:\A\spider\cat\\weapon.json", "a") as f:
                            f.write(str(json.dumps(d)).replace("{", "").replace("}", "")+",")
                    error_count = 4
                except requests.exceptions.RequestException:
                    error_count += 1
                    print("getlink error {}times".format(str(error_count)))
                if error_count==3:
                    error_count = 4

    def run(self):
        self.get_objects()


if __name__ == '__main__':
    freersackler = Freersackler_China()   
    freersackler.run()
